{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "给train_plc添加id和sort_col,因为tsfresh提取特征需要使用id和sort_col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import joblib\n",
    "import datetime as dt\n",
    "import os\n",
    "\n",
    "from joblib import Parallel,delayed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def assign_id_single_sensor(sensor_seg_num, sensor):\n",
    "    '''\n",
    "    描述:\n",
    "        为一个sensor文件增加一列id（id从1到sensor_seg_num）\n",
    "    参数：\n",
    "        sensor_seg_num(int):采样数据的分段数\n",
    "        sensor(DataFrame):采样数据\n",
    "    '''\n",
    "    sensor_len = sensor.shape[0]\n",
    "    sensor_seg_len = int(sensor_len / sensor_seg_num)\n",
    "    sensor_res = sensor_len % sensor_seg_num\n",
    "    # create column id\n",
    "    a = np.ones(sensor_seg_len + 1)\n",
    "    b = np.ones(sensor_seg_len)\n",
    "    a_dup = a.reshape(-1,a.shape[0]).repeat(sensor_res,axis=0)\n",
    "    A = a_dup*np.arange(1,sensor_res+1).reshape(-1,1)\n",
    "    A = A.ravel()\n",
    "    b_dup = b.reshape(-1,b.shape[0]).repeat((sensor_seg_num - sensor_res),axis=0)\n",
    "    B = b_dup*np.arange(sensor_res+1,sensor_seg_num+1).reshape(-1,1)\n",
    "    B = B.ravel()\n",
    "    C = np.hstack((A,B)).astype(int)\n",
    "    sensor['id'] = C\n",
    "    return sensor\n",
    "\n",
    "def assign_sort_col(df):\n",
    "    '''\n",
    "    描述：\n",
    "        为df文件添加一列‘sort_col’\n",
    "    '''\n",
    "    df['sort_col'] = df.index\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def assign_id_all_sensor(data_no):\n",
    "    '''\n",
    "    描述：\n",
    "        \n",
    "    参数：\n",
    "        data_no（int）: 例如：1\n",
    "        \n",
    "    '''\n",
    "    plc_path = './train_plc_RULR/train_0%s_plc.lz4'%data_no\n",
    "    sensor_input_dir = './sensors/0%s/'%data_no\n",
    "    sensor_output_folder = './sensors_id_sort/0%s'%data_no\n",
    "\n",
    "    if not os.path.exists(sensor_output_folder):\n",
    "        os.makedirs(sensor_output_folder)\n",
    "    # 读入plc以获取plc对应csv_no数量   \n",
    "    data_plc = joblib.load(plc_path)\n",
    "    plc_sample_points = data_plc['csv_no'].value_counts().sort_index() # index代表第几个sensor文件\n",
    "    \n",
    "    def assign_id_basis_func(idx):\n",
    "        input_path = os.path.join(sensor_input_dir, '%d.lz4'%idx)\n",
    "        sensor = joblib.load(input_path)\n",
    "        tmp = assign_id_single_sensor(plc_sample_points[idx], sensor)\n",
    "        tmp = assign_sort_col(tmp)\n",
    "        output_path = os.path.join(sensor_output_folder, '%d.lz4'%idx)\n",
    "        joblib.dump(tmp, output_path, compress='lz4')\n",
    "\n",
    "    Parallel(n_jobs=len(plc_sample_points.index), verbose=10)(delayed(assign_id_basis_func)(idx) for idx in plc_sample_points.index)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=48)]: Using backend LokyBackend with 48 concurrent workers.\n",
      "[Parallel(n_jobs=48)]: Done   3 out of  48 | elapsed:   30.6s remaining:  7.6min\n",
      "[Parallel(n_jobs=48)]: Done   8 out of  48 | elapsed:   41.5s remaining:  3.5min\n",
      "[Parallel(n_jobs=48)]: Done  13 out of  48 | elapsed:   44.1s remaining:  2.0min\n",
      "[Parallel(n_jobs=48)]: Done  18 out of  48 | elapsed:   46.0s remaining:  1.3min\n",
      "[Parallel(n_jobs=48)]: Done  23 out of  48 | elapsed:   48.0s remaining:   52.2s\n",
      "[Parallel(n_jobs=48)]: Done  28 out of  48 | elapsed:   48.5s remaining:   34.7s\n",
      "[Parallel(n_jobs=48)]: Done  33 out of  48 | elapsed:   49.1s remaining:   22.3s\n",
      "[Parallel(n_jobs=48)]: Done  38 out of  48 | elapsed:   49.6s remaining:   13.0s\n",
      "[Parallel(n_jobs=48)]: Done  43 out of  48 | elapsed:   50.4s remaining:    5.9s\n",
      "[Parallel(n_jobs=48)]: Done  48 out of  48 | elapsed:   52.5s remaining:    0.0s\n",
      "[Parallel(n_jobs=48)]: Done  48 out of  48 | elapsed:   52.5s finished\n",
      "[Parallel(n_jobs=48)]: Using backend LokyBackend with 48 concurrent workers.\n",
      "[Parallel(n_jobs=48)]: Done   3 out of  48 | elapsed:   20.8s remaining:  5.2min\n",
      "[Parallel(n_jobs=48)]: Done   8 out of  48 | elapsed:   35.6s remaining:  3.0min\n",
      "[Parallel(n_jobs=48)]: Done  13 out of  48 | elapsed:   36.9s remaining:  1.7min\n",
      "[Parallel(n_jobs=48)]: Done  18 out of  48 | elapsed:   37.7s remaining:  1.0min\n",
      "[Parallel(n_jobs=48)]: Done  23 out of  48 | elapsed:   41.0s remaining:   44.5s\n",
      "[Parallel(n_jobs=48)]: Done  28 out of  48 | elapsed:   41.0s remaining:   29.3s\n",
      "[Parallel(n_jobs=48)]: Done  33 out of  48 | elapsed:   41.0s remaining:   18.6s\n",
      "[Parallel(n_jobs=48)]: Done  38 out of  48 | elapsed:   42.5s remaining:   11.2s\n",
      "[Parallel(n_jobs=48)]: Done  43 out of  48 | elapsed:   54.0s remaining:    6.3s\n",
      "[Parallel(n_jobs=48)]: Done  48 out of  48 | elapsed:   54.1s remaining:    0.0s\n",
      "[Parallel(n_jobs=48)]: Done  48 out of  48 | elapsed:   54.1s finished\n",
      "[Parallel(n_jobs=37)]: Using backend LokyBackend with 37 concurrent workers.\n",
      "[Parallel(n_jobs=37)]: Done   4 out of  37 | elapsed:   30.6s remaining:  4.2min\n",
      "[Parallel(n_jobs=37)]: Done   8 out of  37 | elapsed:   32.4s remaining:  2.0min\n",
      "[Parallel(n_jobs=37)]: Done  12 out of  37 | elapsed:   34.3s remaining:  1.2min\n",
      "[Parallel(n_jobs=37)]: Done  16 out of  37 | elapsed:   34.8s remaining:   45.6s\n",
      "[Parallel(n_jobs=37)]: Done  20 out of  37 | elapsed:   35.6s remaining:   30.2s\n",
      "[Parallel(n_jobs=37)]: Done  24 out of  37 | elapsed:   36.3s remaining:   19.7s\n",
      "[Parallel(n_jobs=37)]: Done  28 out of  37 | elapsed:   37.1s remaining:   11.9s\n",
      "[Parallel(n_jobs=37)]: Done  32 out of  37 | elapsed:   38.1s remaining:    6.0s\n",
      "[Parallel(n_jobs=37)]: Done  37 out of  37 | elapsed:   38.5s finished\n"
     ]
    }
   ],
   "source": [
    "assign_id_all_sensor(1)\n",
    "assign_id_all_sensor(2)\n",
    "assign_id_all_sensor(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
